*Generates TABLE 2: Summary of BCBS Patient Data by Outpatient Provider
*Version 15 Stata

set more off

*Establish working directories
local d3 Geographic Data
local d5 Antidepressant Data
local d22 Entropy Data

*******************************************************************************
*******************************************************************************
*PANEL A
*******************************************************************************
*******************************************************************************

*Bring in data (basically raw antidepressant data)
cd `d5'
use if year==2013 using provider_product_cat_allyears.dta, clear

*******************************************************************************
*SAMPLE SELECTION
*Flag missing age/sex values
*******************************************************************************
generate exclude_values = cond(patient_age=="UNK" | patient_gender=="U", 1, cond(patient_age=="00-02" | patient_age=="03-09", 2, 0))
drop if exclude_values!=0


*Collape records into provider-year records
*NOTE: We do this to reduce computational requirements for the rest of the code.
collapse (sum) tot_scripts, by(provider_id year)

*******************************************************************************
*SAMPLE SELECTION
*Keep only physicians who appear in the AMA file + valid geographic information
*NOTE: The variable county_recode provides the "raw" county id for each provider.
*******************************************************************************
cd `d3'
merge m:1 provider_id using verified_county_all.dta
keep if _merge==3
drop _merge
rename county_recode county_raw

*******************************************************************************
*SAMPLE SELECTION
*Define specialty, then exclude "exclude" specialties
*******************************************************************************
generate specialty=""
replace specialty = "Exclude" if provider_speciality_descrip=="DENTIST" | provider_speciality_descrip=="VETERINARIAN" | provider_speciality_descrip=="NOT APPLICABLE" | provider_speciality_descrip=="UNSPECIFIED"
drop if specialty=="Exclude"

*Redefine other specialties
replace specialty="Primary Care" if provider_speciality_descrip=="FAMILY MEDICINE" | provider_speciality_descrip=="INTERNAL MEDICINE" | provider_speciality_descrip=="GENERAL PRACTICE" | provider_speciality_descrip=="PEDIATRICS" | provider_speciality_descrip=="INTERNAL MEDICINE/PEDIATRICS" | provider_speciality_descrip=="GERIATRIC MEDICINE (INTERNAL MEDICIN" | provider_speciality_descrip=="HOSPITALIST" | provider_speciality_descrip=="GERIATRIC MEDICINE (FAMILY MEDICINE)" | provider_speciality_descrip=="GENERAL PREVENTIVE MEDICINE" | provider_speciality_descrip=="SLEEP MEDICINE" | provider_speciality_descrip=="INTERNAL MEDICINE/FAMILY MEDICINE" | provider_speciality_descrip=="DEVELOPMENTAL/BEHAVIORAL PEDIATRICS" | provider_speciality_descrip=="ADOLESCENT MEDICINE (PEDIATRICS)" | provider_speciality_descrip=="INTERNAL MEDICINE/PREVENTIVE MEDICIN" | provider_speciality_descrip=="ADOLESCENT MEDICINE (INTERNAL MEDICI" | provider_speciality_descrip=="INTERNAL MEDICINE/EMERGENCY MEDICINE"

replace specialty="Psychiatry" if provider_speciality_descrip=="PSYCHIATRY" | provider_speciality_descrip=="CHILD & ADOLESCENT PSYCHIATRY" | provider_speciality_descrip=="GERIATRIC PSYCHIATRY" | provider_speciality_descrip=="ADDICTION PSYCHIATRY" | provider_speciality_descrip=="FORENSIC PSYCHIATRY" | provider_speciality_descrip=="PSYCHIATRY/NEUROLOGY" | provider_speciality_descrip=="ADDICTION MEDICINE" | provider_speciality_descrip=="PED/PSYCH/CHILD & ADOLESCENT PSYCH" | provider_speciality_descrip=="INTERNAL MEDICINE/PSYCHIATRY" | provider_speciality_descrip=="PSYCHOANALYSIS" | provider_speciality_descrip=="PSYCHIATRY/FAMILY MEDICINE" | provider_speciality_descrip=="PAIN MEDICINE (PSYCHIATRY)" | provider_speciality_descrip=="BEHAVIORAL HEALTH & SOCIAL SERVICES"

replace specialty = "Non Physician" if provider_speciality_descrip=="NURSE PRACTITIONER" | provider_speciality_descrip=="PHYSICIAN ASSISTANT" | provider_speciality_descrip=="CLINICAL NURSE SPECIALIST" | provider_speciality_descrip=="PSYCHOLOGY" | provider_speciality_descrip=="REGISTERED NURSE" | provider_speciality_descrip=="ADVANCED REGISTERED NURSE" | provider_speciality_descrip=="LICENSED PRACTICAL NURSE" | provider_speciality_descrip=="LICENSED VOCATIONAL NURSE"

replace specialty = "Other Medical" if provider_speciality_descrip=="EMERGENCY MEDICINE" | provider_speciality_descrip=="CARDIOVASCULAR DISEASE" | provider_speciality_descrip=="PHYSICAL MEDICINE & REHABILITATION" | provider_speciality_descrip=="OBSTETRICS & GYNECOLOGY" | provider_speciality_descrip=="GENERAL SURGERY" | provider_speciality_descrip=="NEPHROLOGY" | provider_speciality_descrip=="PULMONARY DISEASE" | provider_speciality_descrip=="INFECTIOUS DISEASE" | provider_speciality_descrip=="ANESTHESIOLOGY" | provider_speciality_descrip=="GASTROENTEROLOGY" | provider_speciality_descrip=="HEMATOLOGY/ONCOLOGY" | provider_speciality_descrip=="RHEUMATOLOGY" | provider_speciality_descrip=="MEDICAL ONCOLOGY" | provider_speciality_descrip=="ENDOCRINOLOGY, DIABETES & METABOLISM" | provider_speciality_descrip=="ORTHOPEDIC SURGERY" | provider_speciality_descrip=="DERMATOLOGY" | provider_speciality_descrip=="PULMONARY CRITICAL CARE MEDICINE" | provider_speciality_descrip=="UROLOGY" | provider_speciality_descrip=="OTOLARYNGOLOGY" | provider_speciality_descrip=="HEMATOLOGY (INTERNAL MEDICINE)" | provider_speciality_descrip=="PLASTIC SURGERY" | provider_speciality_descrip=="GYNECOLOGY" | provider_speciality_descrip=="CRITICAL CARE MEDICINE (IM)" | provider_speciality_descrip=="RADIATION ONCOLOGY" | provider_speciality_descrip=="OBSTETRICS"

replace specialty="Other" if specialty==""

*******************************************************************************
*SAMPLE SELECTION
*Generate flag if county info missing, then exclude missing county physicians
*******************************************************************************
generate county_missing = cond(county_raw==., 1, 0)
drop if county_missing==1
drop county_missing

*Count up providers by specialty
generate psychiatrist = specialty=="Psychiatry"
generate gp = specialty=="Primary Care"
generate medical = specialty=="Other Medical"

generate count = 1
by year county_raw, s: egen providers = sum(count)
by year county_raw, s: egen psychiatrists = sum(psychiatrist)
by year county_raw, s: egen gps = sum(gp)
by year county_raw, s: egen medicals = sum(medical)

*Collapse data into specialty-county-year records
*NOTE: We do this to keep the data manageable; currently, the data are in 
*	provider, product, payment type, gender, age year record format.
*IMPORTANT: These data include missing age and gender scripts whereas our 
*	final sample for regressions exclude these.
collapse (sum) tot_scripts (first) providers-medicals, by(year specialty county_raw)

*Reshape data to wide - one record per county
encode specialty, gen(spec) label(specialty)
drop specialty
numlabel, add
tab spec
reshape wide tot_scripts, i(year county_raw) j(spec)

*Replace missing values, e.g., some couties won't have any scripts by psychiatrists
forvalues i=1(1)5 {
replace tot_scripts`i'=0 if tot_scripts`i'==.
}

*Calculate scripts by specialty
generate tot_scripts = tot_scripts1+tot_scripts2+tot_scripts3+tot_scripts4+tot_scripts5
generate psych_scripts = tot_scripts5
generate gp_scripts = tot_scripts4
generate other_scripts = tot_scripts3
drop tot_scripts1-tot_scripts5

*******************************************************************************
*Table 1, Row 1: #Prescriptions
*******************************************************************************
preserve

foreach var of varlist *scripts {
by year, sort: egen long `var'_tot = sum(`var')
}
by year, s: keep if _n==1
keep year *tot

label variable year "Year"
label variable tot_scripts_tot "# Prescriptions"
label variable gp_scripts_tot "# Prescriptions by GPs"
label variable psych_scripts_tot "# Prescriptions by Psychiatrists"
label variable other_scripts_tot "# Prescriptions by Other Medicals"

*convert to millions
foreach var of varlist tot_scripts_tot gp_scripts_tot psych_scripts_tot other_scripts_tot {
replace `var' = `var'/1000000
}
list tot_scripts_tot gp_scripts_tot psych_scripts_tot other_scripts_tot

restore

*******************************************************************************
*Table 1, Row 2: #Providers
*******************************************************************************
preserve

collapse (sum) providers-medicals

list providers gps psychiatrists medicals

restore

*******************************************************************************
*Table 1, Row 3: #Prescriptions/Provider
*******************************************************************************
preserve

*Collapse data to one record per year
collapse (sum) providers-pop2010census, by(year)

*NUMBER OF PRESCRIPTIONS PER PROVIDER
generate scripts_pp = tot_scripts / providers

*NUMBER OF PRESCRIPTIONS PER GP
generate scripts_gp = gp_scripts / gps

*NUMBER OF PRESCRIPTIONS PER PSYCHIATRIST
generate scripts_psych = psych_scripts / psychiatrists

*NUMBER OF PRESCRIPTIONS PER OTHER MEDICAL
generate scripts_other = other_scripts / medicals

*Parse variables
keep year scripts*
label variable year "Year"
label variable scripts_pp "# Prescriptions per Provider"
label variable scripts_gp "# Prescriptions per GP"
label variable scripts_psych "# Prescriptions per Psychiatrist"
label variable scripts_other "# Prescriptions per Other Medical"

list scripts_pp scripts_gp scripts_psych scripts_other

restore

*******************************************************************************
*******************************************************************************
*PANELS B and C
*******************************************************************************
*******************************************************************************

*Bring in data
cd `d22'
use if year==2013 using entropy_data_final_we.dta, clear

*Calculate entropy by specialty and cohort
*NOTE: We keep only physicians, with valid grad years, and rank available.
*Select drug product definition
local drug product3

*Pull provider-level information for merging later
*NOTE: We do this to ease computational burden.
preserve
by provider_id, sort: keep if _n==1
keep provider_id provider_yob-specialty
tempfile provider
save `provider', replace
restore

*Collapse data into product, provider, and year
*NOTE: These data are in patient-product-provider-year level, but we don't
*	need that level of granularity for summary statistics
collapse (sum) tot_scripts new_scripts, by(provider_id year `drug')

preserve

*Hard code global mental health drug products across years
local drug_products=33

*Generate total yearly prescriptions by county
by provider_id year, sort: egen total = sum(tot_scripts)

*Generate entropy measures
generate share = tot_scripts/total
generate temp2 = -(share*ln(share))/ln(`drug_products')
by provider_id year, sort: egen entropy_all_global=sum(temp2)

*Keep one record per provider/year
by provider_id year, sort: keep if _n==1

*parse variables
keep provider_id year entropy_all_global total

tempfile temp2
save `temp2', replace

restore

*Calculate share of new prescriptions to overall favorite

preserve

by provider_id year, s: egen double tot=sum(tot_scripts)
generate share = tot_scripts/tot
by provider_id year, s: egen max = max(share)
generate max2 = float(share)==float(max)
drop tot share
by provider_id year, s: egen double tot=sum(new_scripts)
generate share = new_scripts/tot
keep if max2==1
by provider_id year, s: keep if _n==1

keep provider_id year share

tempfile temp3
save `temp3', replace

restore

*Combine two files
use `temp2', clear
merge 1:1 provider_id year using `temp3'
drop _merge

*Merge back in physician characteristics
merge m:1 provider_id using `provider'
keep if _merge==3
drop _merge

*Parse variables
keep provider_id year entropy_all_global total provider_grad_year specialty provider_med_school share

*Smooth total variable
replace total = round(total)

*******************************************************************************
*SAMPLE SELECTION
*Include only those providers with valid grad years and define graduation cohort
*NOTE: We keep only physicians, with valid grad years, and rank available.
*******************************************************************************
destring provider_grad_year, replace
keep if provider_grad_year!=.

*Define cohorts
generate cohort=""
replace cohort = "1975" if provider_grad_year<=1975
replace cohort = "1976-1985" if provider_grad_year>1975 & provider_grad_year<=1985
replace cohort = "1986-1995" if provider_grad_year>1985 & provider_grad_year<=1995
replace cohort = "1996+" if cohort==""

*******************************************************************************
*SAMPLE SELECTION
* Include only physicians, i.e., exclude non-physicians, 
*	with valid rankings, where we merge in medical school rank information
*******************************************************************************
keep if specialty!="Non Physician"

*Merge in school rank
cd `d3'
merge m:1 provider_med_school using rankings.dta
drop if _merge==2

*Fix rankings (ad hoc) - some are not ranked in rank file but should be
generate missing = cond(provider_med_school=="",1,0)
*tab missing _merge
*tab provider_med_school if missing==0 & _merge==1, sort

*Adjust incorrect rank values; merged with Drexel
replace rank = "88" if provider_med_school == "HAHNEMANN UNIVERSITY SCHOOL OF MEDICINE"
replace rank = "88" if provider_med_school == "MEDICAL COLLEGE OF PENNSYLVANIA"
replace rank = "44" if provider_med_school == "UNIV OF CA, IRVINE, CA COLL OF MED, IRVINE CA"
replace _merge=3 if rank=="88" | rank=="44"

*Handle people with missing rankings; are they all foreign?
replace rank = "Foreign" if rank=="" & provider_med_school!="UNIVERSITY OF MEDICINE"
replace rank = "Unknown" if rank=="" & provider_med_school=="UNIVERSITY OF MEDICINE"

*Define region
generate region2 = ""
replace region2 = "US" if region==99
replace region2 = "Caribbean" if region==100
replace region2 = "Canada" if region==101
replace region2 = "Central America" if region==102
replace region2 = "South America" if region==103
replace region2 = "Western Europe" if region==104
replace region2 = "Eastern Europe" if region==105
replace region2 = "Western and South Asia" if region==106
replace region2 = "Eastern Asia" if region==107
replace region2 = "North Africa" if region==108
replace region2 = "Other Africa" if region==109 | region==110
replace region2 = "Oceania" if region==111
replace region2 = "Unknown" if region2==""

*Generate cohort region identifier
egen group = group(cohort region2)

drop if rank=="Unknown"
generate top5 = rank=="1" | rank=="2" | rank=="3" | rank=="4" | rank=="5"

*Parse variables
keep provider_id year specialty total entropy* cohort top5 share

*Calculate entropy by provider category
*NOTE: For each year, we want the average entropy by graduation cohort (<1975,
*	1976-1985, 1986-1995, and 1996-2005), by specialty (All physicians, GPs,
*	Psychiatrists, and Other Medical), and by rank (Psychiatrists from Top 5 
*	schools).
encode cohort, gen(cohort2)
rename share new_share_total

*Save temporary file
tempfile temp
save `temp', replace

*All physicians

use `temp', clear

forvalues x = 1(1)4 {

preserve

keep if cohort2==`x'

*Collapse into national records
collapse (mean) entropy_all_global new_share_total [fw=total], by(year)
rename entropy_all_global e_pa_all_`x'
rename new_share_total new_share_total_all_`x'
tempfile all_`x'
save `all_`x'', replace

restore

}


*GPs

use `temp', clear

keep if specialty=="Primary Care"

forvalues x = 1(1)4 {

preserve

keep if cohort2==`x'

*Collapse into national records
collapse (mean) entropy_all_global new_share_total [fw=total], by(year)
rename entropy_all_global e_pa_gp_`x'
rename new_share_total new_share_total_gp_`x'

tempfile gp_`x'
save `gp_`x'', replace

restore

}

*Psychiatrists

use `temp', clear

keep if specialty=="Psychiatry"

forvalues x = 1(1)4 {

preserve

keep if cohort2==`x'

*Collapse into national records
collapse (mean) entropy_all_global new_share_total [fw=total], by(year)
rename entropy_all_global e_pa_psych_`x'
rename new_share_total new_share_total_psych_`x'

tempfile psych_`x'
save `psych_`x'', replace

restore

}

*Other medical

use `temp', clear

keep if specialty=="Other Medical"

forvalues x = 1(1)4 {

preserve

keep if cohort2==`x'

*Collapse into national records
collapse (mean) entropy_all_global new_share_total [fw=total], by(year)
rename entropy_all_global e_pa_omed_`x'
rename new_share_total new_share_total_omed_`x'


tempfile omed_`x'
save `omed_`x'', replace

restore

}

*Merge entropy files together

*All categories
use `all_1', clear
forvalues i=2(1)4 {
merge 1:1 year using `all_`i''
drop _merge
}

*Add in GP/Psych/Other medical categories
forvalues i=1(1)4 {
merge 1:1 year using `gp_`i''
drop _merge
merge 1:1 year using `psych_`i''
drop _merge
merge 1:1 year using `omed_`i''
drop _merge
}

*Reshape data
reshape long e_pa_all e_pa_gp e_pa_psych e_pa_omed new_share_total_all new_share_total_gp new_share_total_psych new_share_total_omed, i(year) j(cohort) str

*Replace cohort labels
replace cohort = substr(cohort,2,.)

*Fix cohort variable
rename cohort cohort2
generate cohort=""
replace cohort = "1975" if cohort2=="1"
replace cohort = "1976-1985" if cohort2=="2"
replace cohort = "1986-1995" if cohort2=="3"
replace cohort = "1996+" if cohort2=="4"
drop cohort2
order year cohort

order new_share_*, last
order e_pa_*, before(new_share_total_all)

*******************************************************************************
*PANEL B
*******************************************************************************
list cohort e_pa_all e_pa_gp e_pa_psych e_pa_omed

*******************************************************************************
*PANEL C
*******************************************************************************
list cohort new_share_total_all new_share_total_gp new_share_total_psych new_share_total_omed
